{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import tensorflow as tf\n",
    "\n",
    "from keras.layers import Dense, Input, LSTM, Embedding, Dropout, Activation\n",
    "from keras.layers import Bidirectional, GlobalMaxPool1D,Bidirectional\n",
    "from keras.models import Model\n",
    "from keras.layers import TimeDistributed\n",
    "from keras.layers import concatenate\n",
    "from keras.layers import Dense, Input\n",
    "from keras.optimizers import Adam\n",
    "from keras.models import Model\n",
    "from keras.layers import CuDNNLSTM\n",
    "import gc\n",
    "import os"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data loading"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train = np.load('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/features/x_train.npy')\n",
    "x_test = np.load('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/features/x_test.npy')\n",
    "sent_train = np.load('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/features/sent_train.npy')\n",
    "sent_test = np.load('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/features/sent_test.npy')\n",
    "targets = np.load('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/targets/targets.npy')\n",
    "embedding_matrix_ = np.load('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/transformer_layer/embedding_matrix.npy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.01664949, -0.06661227, -0.01632868, ..., -0.01999032,\n",
       "        -0.05139988, -0.0263568 ],\n",
       "       [-0.01319846, -0.06733431, -0.01605646, ..., -0.0226614 ,\n",
       "        -0.05537301, -0.02600443],\n",
       "       [-0.01759106, -0.07094341, -0.01443494, ..., -0.02457913,\n",
       "        -0.05956192, -0.0231829 ],\n",
       "       ...,\n",
       "       [-0.0231029 , -0.05878259, -0.01048967, ..., -0.01945743,\n",
       "        -0.02615411, -0.02118432],\n",
       "       [-0.0490171 , -0.05614787, -0.00465348, ..., -0.01065376,\n",
       "        -0.01797333, -0.02187675],\n",
       "       [-0.00646111, -0.0914881 , -0.00254872, ..., -0.01505679,\n",
       "        -0.05040044,  0.04597744]], dtype=float32)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "embedding_matrix_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fitting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def new_model(embedding_matrix):\n",
    "    \n",
    "    inp = Input(shape=(128, ))\n",
    "    inp2= Input(shape=(1,))\n",
    "\n",
    "    x = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1],weights=[embedding_matrix],trainable=False)(inp)\n",
    "\n",
    "    x = CuDNNLSTM(150, return_sequences=True,name='lstm_layer',)(x)\n",
    "    x = CuDNNLSTM(100, return_sequences=False,name='lstm_layer-2',)(x)\n",
    "    \n",
    "    y =Dense(10,activation='relu')(inp2)\n",
    "    x= concatenate([x,y])\n",
    "    \n",
    "    x = Dense(108,activation='sigmoid')(x)\n",
    "\n",
    "    model = Model(inputs=[inp,inp2], outputs=x)\n",
    "\n",
    "    model.compile(loss='binary_crossentropy',\n",
    "                      optimizer='adam')\n",
    "\n",
    "\n",
    "    #print(model.summary())\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "input_1 (InputLayer)            (None, 128)          0                                            \n",
      "__________________________________________________________________________________________________\n",
      "embedding_1 (Embedding)         (None, 128, 768)     23440896    input_1[0][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "lstm_layer (CuDNNLSTM)          (None, 128, 150)     552000      embedding_1[0][0]                \n",
      "__________________________________________________________________________________________________\n",
      "input_2 (InputLayer)            (None, 1)            0                                            \n",
      "__________________________________________________________________________________________________\n",
      "lstm_layer-2 (CuDNNLSTM)        (None, 100)          100800      lstm_layer[0][0]                 \n",
      "__________________________________________________________________________________________________\n",
      "dense_1 (Dense)                 (None, 10)           20          input_2[0][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "concatenate_1 (Concatenate)     (None, 110)          0           lstm_layer-2[0][0]               \n",
      "                                                                 dense_1[0][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "dense_2 (Dense)                 (None, 108)          11988       concatenate_1[0][0]              \n",
      "==================================================================================================\n",
      "Total params: 24,105,704\n",
      "Trainable params: 664,808\n",
      "Non-trainable params: 23,440,896\n",
      "__________________________________________________________________________________________________\n",
      "None\n",
      "Epoch 1/5\n",
      "27486/27486 [==============================] - 15s 551us/step - loss: 0.1994\n",
      "Epoch 2/5\n",
      "27486/27486 [==============================] - 14s 502us/step - loss: 0.1828\n",
      "Epoch 3/5\n",
      "27486/27486 [==============================] - 14s 511us/step - loss: 0.1727\n",
      "Epoch 4/5\n",
      "27486/27486 [==============================] - 14s 510us/step - loss: 0.1677\n",
      "Epoch 5/5\n",
      "27486/27486 [==============================] - 14s 519us/step - loss: 0.1661\n",
      "CPU times: user 1min 18s, sys: 3.14 s, total: 1min 21s\n",
      "Wall time: 1min 14s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "model=new_model(embedding_matrix_)\n",
    "history=model.fit([x_train,sent_train],targets,epochs=5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 予測"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[4.0948644e-01, 3.4232140e-01, 3.1326446e-01, ..., 5.0406601e-05,\n",
       "        1.4211882e-05, 3.3758883e-05],\n",
       "       [4.0948644e-01, 3.4232140e-01, 3.1326446e-01, ..., 5.0406601e-05,\n",
       "        1.4211882e-05, 3.3758883e-05],\n",
       "       [4.0948644e-01, 3.4232140e-01, 3.1326446e-01, ..., 5.0406601e-05,\n",
       "        1.4211882e-05, 3.3758883e-05],\n",
       "       ...,\n",
       "       [8.9933556e-01, 8.8674951e-01, 8.7265652e-01, ..., 7.9718608e-05,\n",
       "        3.2823744e-05, 4.9186467e-05],\n",
       "       [3.8082889e-01, 3.7072879e-01, 3.6704493e-01, ..., 1.3675296e-04,\n",
       "        6.0742277e-05, 5.4934149e-05],\n",
       "       [8.9933556e-01, 8.8674951e-01, 8.7265652e-01, ..., 7.9718608e-05,\n",
       "        3.2823744e-05, 4.9186467e-05]], dtype=float32)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions=model.predict([x_test,sent_test])\n",
    "predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_output(sub,predictions):\n",
    "    preds=[]\n",
    "    for i,row in enumerate(sub['text']):\n",
    "\n",
    "        text,target=row.lower(),predictions[i].tolist()\n",
    "        target=np.round(target).tolist()\n",
    "        try:\n",
    "            start,end=target.index(1),target[::-1].index(1)\n",
    "            text_list=tokenizer.tokenize(text)\n",
    "            text_list=text_list+((108-len(text_list))*['pad'])\n",
    "            start_w,end_w=text_list[start],text_list[-end]\n",
    "            start=text.find(start_w.replace(\"#\",'',1))    ## remove # to match substring\n",
    "            end=text.find(end_w.replace(\"#\",''),start)\n",
    "            #pred=' '.join([x for x in text_list[start:-end]])\n",
    "            pred=text[start:end]\n",
    "        except:\n",
    "            pred=text\n",
    "        \n",
    "        preds.append(pred)\n",
    "        \n",
    "    return preds\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3535"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df = pd.read_csv('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/test.csv')\n",
    "prediction_text=convert_output(test_df,predictions)\n",
    "len(prediction_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "sub=pd.read_csv(\"/home/tidal/ML_Data/Tweet_Sentiment_Extraction/sample_submission.csv\")\n",
    "sub['selected_text']=prediction_text\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# neutralだけそのままにしてみる -②"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "def jaccard(str1, str2): \n",
    "    a = set(str1.lower().split()) \n",
    "    b = set(str2.lower().split())\n",
    "    c = a.intersection(b)\n",
    "    return float(len(c)) / (len(a) + len(b) - len(c))\n",
    "\n",
    "def jaccard_score(txt_list1, txt_list2):\n",
    "    jcc_score = 0\n",
    "    for txt1, txt2 in zip(txt_list1, txt_list2):\n",
    "        jcc_score += jaccard(txt1,txt2)\n",
    "        #print(jcc_score)\n",
    "\n",
    "    jcc_score = jcc_score/len(txt_list1)\n",
    "    return jcc_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.4666666666666667"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jaccard_score([\"A B C D\",\"Q A Z W\"],[\"A C D H\",\"Q A T G\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>textID</th>\n",
       "      <th>selected_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11aa4945ff</td>\n",
       "      <td>http://twitpic.com/67swx - i wish i was calli...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>fd1db57dc0</td>\n",
       "      <td>i'm done.haha. house md marathon ulet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2524332d66</td>\n",
       "      <td>i'm concerned for that family</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0fb19285b2</td>\n",
       "      <td>hey guys it's working no need to worry. i have...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>e6c9e5e3ab</td>\n",
       "      <td>26th february</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       textID                                      selected_text\n",
       "0  11aa4945ff   http://twitpic.com/67swx - i wish i was calli...\n",
       "1  fd1db57dc0              i'm done.haha. house md marathon ulet\n",
       "2  2524332d66                      i'm concerned for that family\n",
       "3  0fb19285b2  hey guys it's working no need to worry. i have...\n",
       "4  e6c9e5e3ab                                      26th february"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>textID</th>\n",
       "      <th>text</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>e6c9e5e3ab</td>\n",
       "      <td>26th February</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>c6322a85c2</td>\n",
       "      <td>_Layne  hmm.. what's ur fav movie?? tv shows??</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>934ac753a2</td>\n",
       "      <td>Nope, I'm on my way home now.</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>a8e2c45625</td>\n",
       "      <td>In Study Hall with Brittneyy!!</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>d761530c7f</td>\n",
       "      <td>is working until 11 tonight  http://plurk.com/...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>d5c25dbc63</td>\n",
       "      <td>still need to see that movie ! I am all abou...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>7a897c9999</td>\n",
       "      <td>http://twitpic.com/4wptj one of my prized mag...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>4d7def819d</td>\n",
       "      <td>Aw, sorry E. :/ I hope it looks up for you (l...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>390f169584</td>\n",
       "      <td>im a lady and ladies don't move couches. they...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>0de656eac1</td>\n",
       "      <td>**** fiesta at my house in my mouth  hit me up...</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        textID                                               text sentiment\n",
       "4   e6c9e5e3ab                                      26th February   neutral\n",
       "8   c6322a85c2     _Layne  hmm.. what's ur fav movie?? tv shows??   neutral\n",
       "10  934ac753a2                      Nope, I'm on my way home now.   neutral\n",
       "12  a8e2c45625                     In Study Hall with Brittneyy!!   neutral\n",
       "13  d761530c7f  is working until 11 tonight  http://plurk.com/...   neutral\n",
       "16  d5c25dbc63    still need to see that movie ! I am all abou...   neutral\n",
       "17  7a897c9999   http://twitpic.com/4wptj one of my prized mag...   neutral\n",
       "18  4d7def819d   Aw, sorry E. :/ I hope it looks up for you (l...   neutral\n",
       "24  390f169584   im a lady and ladies don't move couches. they...   neutral\n",
       "25  0de656eac1  **** fiesta at my house in my mouth  hit me up...   neutral"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_neutral = test_df[test_df['sentiment'] == 'neutral']\n",
    "test_neutral.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "is working until 11 tonight  http://plurk.com/p/x2kbv\n",
      "is working until 11 tonight  http://plurk.com/p/x2kbv\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "test_text_list = test_df.loc[:,'text']\n",
    "sub_text_list = sub.loc[:,'selected_text']\n",
    "print(test_text_list[13])\n",
    "print(sub_text_list[13])\n",
    "print(jaccard(test_text_list[25],sub_text_list[25]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>textID</th>\n",
       "      <th>text</th>\n",
       "      <th>sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>e6c9e5e3ab</td>\n",
       "      <td>26th February</td>\n",
       "      <td>neutral</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       textID            text sentiment\n",
       "4  e6c9e5e3ab   26th February   neutral"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_neutral[test_neutral['textID'] == \"e6c9e5e3ab\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          textID                                      selected_text\n",
      "4     e6c9e5e3ab                                      26th february\n",
      "8     c6322a85c2     _layne  hmm.. what's ur fav movie?? tv shows??\n",
      "10    934ac753a2                      nope, i'm on my way home now.\n",
      "12    a8e2c45625                     in study hall with brittneyy!!\n",
      "13    d761530c7f  is working until 11 tonight  http://plurk.com/...\n",
      "...          ...                                                ...\n",
      "3523  eabaf375f4  i must have viewed that picture of me and  fro...\n",
      "3526  885b8f47f7                                yes sir i sure did.\n",
      "3529  44daf50a02               paper writing til my heart's content\n",
      "3532  08f6036add  invasion of the old ladies has just ended, sti...\n",
      "3534  aa1a163174                                            will do\n",
      "\n",
      "[1430 rows x 2 columns]\n"
     ]
    }
   ],
   "source": [
    "sub_neutral = pd.DataFrame()\n",
    "for test_textID in test_neutral['textID']:\n",
    "    sub_neutral = sub_neutral.append(sub[sub['textID'] == test_textID])\n",
    "\n",
    "print(sub_neutral)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "<class 'pandas.core.frame.DataFrame'>\n"
     ]
    }
   ],
   "source": [
    "print(type(sub_neutral))\n",
    "print(type(test_neutral))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n"
     ]
    }
   ],
   "source": [
    "test_text_list_ntl = test_neutral.loc[:,'text']\n",
    "sub_text_list_ntl = sub_neutral.loc[:,'selected_text']\n",
    "\n",
    "print(jaccard_score(test_text_list_ntl, sub_text_list_ntl))\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.0\n"
     ]
    }
   ],
   "source": [
    "test_text_list = test_df.loc[:,'text']\n",
    "sub_text_list = sub.loc[:,'selected_text']\n",
    "\n",
    "print(jaccard_score(test_text_list, sub_text_list))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ここまで -②"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>textID</th>\n",
       "      <th>selected_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11aa4945ff</td>\n",
       "      <td>http://twitpic.com/67swx - i wish i was calli...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>fd1db57dc0</td>\n",
       "      <td>i'm done.haha. house md marathon ulet</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2524332d66</td>\n",
       "      <td>i'm concerned for that family</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0fb19285b2</td>\n",
       "      <td>hey guys it's working no need to worry. i have...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>e6c9e5e3ab</td>\n",
       "      <td>26th february</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       textID                                      selected_text\n",
       "0  11aa4945ff   http://twitpic.com/67swx - i wish i was calli...\n",
       "1  fd1db57dc0              i'm done.haha. house md marathon ulet\n",
       "2  2524332d66                      i'm concerned for that family\n",
       "3  0fb19285b2  hey guys it's working no need to worry. i have...\n",
       "4  e6c9e5e3ab                                      26th february"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub.to_csv('/home/tidal/ML_Data/Tweet_Sentiment_Extraction/submit/submission.csv',index=False)\n",
    "sub.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "TensorFlow-GPU",
   "language": "python",
   "name": "tf-gpu"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
